#### ======================================================================
#### Online appendix: Unweighted distribution plots by country
#### ----------------------------------------------------------------------
#### Purpose:
####   Visualize the distribution of respondents’ desired relationship
####   ratings with China and with the United States (1 to 10) separately
####   for each country in the study sample.
####
#### Data:
####   data/anonymized_survey.csv
####   Respondent-level (anonymized) survey file with country identifiers
####   and item responses for China_desired and USA_desired.
####
#### Output:
####   output/china_all_unweighted.pdf
####   output/usa_all_unweighted.pdf
####
#### Notes:
####   - These plots use raw (unweighted) counts.
####   - Facets use free y-scales to accommodate country-specific sample sizes.
####   - Country name harmonization ensures consistent labels across figures.
#### ======================================================================

library(dplyr)
library(ggplot2)
library(readr)

#### ----------------------------------------------------------------------
#### Paths and output directory
#### ----------------------------------------------------------------------

survey_path <- "data/anonymized_survey.csv"
out_dir     <- "output"
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)

#### ----------------------------------------------------------------------
#### Plot styling
#### ----------------------------------------------------------------------

base_family    <- "Calibri"
base_size      <- 11
axis_text_size <- 9

#### ----------------------------------------------------------------------
#### Load data and harmonize country names
#### ----------------------------------------------------------------------
#### The recode step standardizes country labels used for faceting and for
#### country filtering below (e.g., "Democratic Republic of Congo" -> "DRC").
#### ----------------------------------------------------------------------

plot_df <- read_csv(survey_path, show_col_types = FALSE) %>%
  mutate(
    country = recode(
      country,
      "Democratic Republic of Congo" = "DRC",
      "Turkey" = "Türkiye",
      "the Philippines" = "Philippines"
    )
  )

#### ----------------------------------------------------------------------
#### Restrict to analysis countries and set common factor levels
#### ----------------------------------------------------------------------
#### The country list defines the set of panels shown in the appendix.
#### Desired relationship responses are coerced to an ordered 1–10 factor
#### so that each facet displays a complete and comparable x-axis.
#### ----------------------------------------------------------------------

countries_to_include <- c(
  "Argentina", "Belarus", "Bolivia", "Brazil", "Burkina Faso",
  "Cameroon", "Colombia", "Croatia", "DRC", "Egypt", "Georgia",
  "Ghana", "Haiti", "Hungary", "Indonesia", "Kenya", "Lebanon",
  "Madagascar", "Mali", "Mexico", "Nigeria", "Peru", "Philippines",
  "Poland", "Romania", "Serbia", "Tunisia", "Türkiye", "Uruguay",
  "Venezuela"
)

plot_df <- plot_df %>%
  filter(country %in% countries_to_include) %>%
  mutate(
    China_desired = factor(China_desired, levels = 1:10),
    USA_desired   = factor(USA_desired,   levels = 1:10)
  )

#### ----------------------------------------------------------------------
#### Helper: faceted bar plot of unweighted counts
#### ----------------------------------------------------------------------
#### This function produces a consistent layout for each outcome:
####   - x-axis: desired relationship rating (1–10)
####   - y-axis: unweighted count within each country
####   - facets: one panel per country
#### ----------------------------------------------------------------------

make_dist_plot <- function(data, x_var, x_label) {
  ggplot(data, aes(x = {{ x_var }})) +
    geom_bar(fill = "grey50") +
    facet_wrap(~ country, scales = "free_y") +
    scale_x_discrete(limits = as.character(1:10), breaks = c("1", "5", "10")) +
    labs(x = x_label, y = "Count") +
    theme_bw(base_size = base_size, base_family = base_family) +
    theme(
      axis.text.x      = element_text(size = axis_text_size, colour = "black"),
      axis.text.y      = element_text(size = axis_text_size, colour = "black"),
      strip.text       = element_text(size = axis_text_size + 1),
      panel.grid.major = element_line(colour = "grey85", linewidth = 0.3),
      panel.grid.minor = element_blank()
    )
}

#### ----------------------------------------------------------------------
#### Figure A: Desired relationship with China (unweighted distributions)
#### ----------------------------------------------------------------------

china_all_plot <- make_dist_plot(
  plot_df,
  China_desired,
  "Desired Relationship with China\n(1 = Not close at all, 10 = Very close)"
)

ggsave(
  filename = file.path(out_dir, "china_all_unweighted.pdf"),
  plot     = china_all_plot,
  width    = 950 / 96,
  height   = 650 / 96,
  units    = "in",
  dpi      = 600,
  device   = function(...) grDevices::cairo_pdf(..., family = base_family)
)

#### ----------------------------------------------------------------------
#### Figure B: Desired relationship with the United States (unweighted distributions)
#### ----------------------------------------------------------------------

usa_all_plot <- make_dist_plot(
  plot_df,
  USA_desired,
  "Desired Relationship with the United States\n(1 = Not close at all, 10 = Very close)"
)

ggsave(
  filename = file.path(out_dir, "usa_all_unweighted.pdf"),
  plot     = usa_all_plot,
  width    = 950 / 96,
  height   = 650 / 96,
  units    = "in",
  dpi      = 600,
  device   = function(...) grDevices::cairo_pdf(..., family = base_family)
)
